ConcordanceΒΆ

Find the ten most commonly used words in a text file.
from re import sub

def concordance(text):
    freq = {}
    for word in text.split():
        word = sub('[^\w]', '', word.lower())
        if word not in freq:
            freq[word] = 0
        freq[word] += 1
    return freq

txt_file_name = 'paradise-lost.txt'
with open(txt_file_name) as f:
    text = ''.join(f)
freq = concordance(text)
sorted(freq.items(), key=lambda kv: kv[1], reverse=True)[:10]

Output:

[('and', 3483),
 ('the', 3162),
 ('to', 2326),
 ('of', 2186),
 ('in', 1430),
 ('with', 1208),
 ('his', 1181),
 ('or', 795),
 ('that', 720),
 ('all', 712)]